import docx
import os

def extract_specific_sections(file_path):
    doc = docx.Document(file_path)
    target_sections = [
        "数据产权证书合规审查及建议",
        "数据资源入表资源范围合规及建议",
        "数据治理合规性审查及建议"
    ]
    result = {}
    current_section = None
    stop_section = "、合规改进措施"  # 定义停止收集内容的标题

    for para in doc.paragraphs:
        para_text = para.text.strip()
        # 检查是否为目标标题
        if para_text.split('、')[-1] in target_sections:
            current_section = para_text
            result[current_section] = []
        # 检查是否遇到停止收集内容的标题
        elif para_text[1:] == stop_section:
            current_section = None
        # 如果当前有正在收集内容的目标标题，且段落文本不为空，则添加到结果中
        elif current_section and para_text:
            result[current_section].append(para_text)

    for section, content in result.items():
        result[section] = '\n'.join(content)
    result_str = '\n\n'.join([f"{k}\n{v}" for k, v in result.items()])
    return result_str

directory_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
file_path = os.path.join(directory_path, 'file', '合规法律意见书.docx')
extracted_content = extract_specific_sections(file_path)
print(extracted_content)